Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())
graph_weight = params$graph_weight
graph_weight
## [1] "10.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

cell_type_name = "CD8T"
file_tag = sprintf("%s_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    24.0    25.0    26.0    25.8    27.0    28.0
length(n_genes)
## [1] 40
sort(n_genes)
##  [1] 24 24 24 24 25 25 25 25 25 25 25 25 25 25 25 25 25 26 26 26 26 26 26 26 26
## [26] 26 26 26 27 27 27 27 27 27 27 27 27 27 27 28
sum(n_genes)
## [1] 1032

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "C7orf55"
## [1] "FMC1-LUC7L2" "FMC1"       
## [1] "MARS"
## [1] "MARS1" "SLA2" 
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "APITD1"
## [1] "CENPS-CORT" "CENPS"     
## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1120   139
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    71  1049   139
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##      sym_in_data   sym_limma
##  1:        ZNRD1      POLR1H
##  2:      C2orf43        LDAH
##  3:       FAM45A     DENND10
##  4:        MTERF      MTERF1
##  5:        NARG2        ICE2
##  6:     C6orf203      MTRES1
##  7:       HRSP12        RIDA
##  8:        FOPNL       CEP20
##  9:         WARS       WARS1
## 10:         LINS       LINS1
## 11:       RRNAD1    METTL25B
## 12:      FAM122B      PABIR2
## 13:       WDYHV1       NTAQ1
## 14:    HIST1H2BD       H2BC5
## 15:       AGPAT6       GPAT4
## 16:     C1orf123        CZIB
## 17:     KIAA1841       SANBR
## 18:      C2orf44        WDCP
## 19:      TMEM155      SMIM43
## 20:      C7orf55 FMC1-LUC7L2
## 21:      C9orf89      CARD19
## 22:     C11orf82       DDIAS
## 23:     XRCC6BP1       ATP23
## 24:         MARS       MARS1
## 25:     C19orf55     PROSER3
## 26:    HIST1H2AE       H2AC8
## 27:         QARS       EPRS1
## 28:       APITD1  CENPS-CORT
## 29:      CCDC101       SGF29
## 30:        PDDC1       GATD1
## 31:      EFCAB4A     CRACR2B
## 32:      EIF2S3L     EIF2S3B
## 33:    HIST1H2BC       H2BC5
## 34:          WRB        GET1
## 35:     TMEM194B       NEMP2
## 36:     HIST1H4C        H4C3
## 37:     HIST1H4J       H4C11
## 38:       ZNF720      KRBOX5
## 39:    HIST1H2BH       H2BC9
## 40:       FAM49A       CYRIA
## 41:        TTC37       SKIC3
## 42:      C1orf85        GLMP
## 43:     C9orf114      SPOUT1
## 44:   HIST2H2AA4      H2AC19
## 45:    HIST2H2BF      H2BC18
## 46:    C10orf128     TMEM273
## 47:     TCTEX1D2     DYNLT2B
## 48:     C17orf89     NDUFAF8
## 49:        PHBP3      PHB1P3
## 50:      SEPT7P7   SEPTIN7P7
## 51:    LINC00493      SMIM26
## 52:    HIST1H2BN      H2BC15
## 53:       ATP5J2      ATP5MF
## 54:        AKAP2  PALM2AKAP2
## 55:        WDR65      CFAP57
## 56:    SRP14-AS1    SRP14-DT
## 57: CTC-436P18.1  SMIM15-AS1
## 58:     HIST1H3G        H3C8
## 59:     EFTUD1P1      EFL1P1
## 60:     ATP5A1P3   ATP5F1AP3
## 61:     CCDC109B        MCUB
## 62:      C1orf63       RSRP1
## 63:       TMEM66       SARAF
## 64:       PCNXL2       PCNX2
## 65:         ACRC        GCNA
## 66:       AMICA1        JAML
## 67:       PRMT10       PRMT9
## 68:      FAM102A       EEIG1
## 69:         SELM     SELENOM
## 70:       NBPF16      NBPF15
## 71:       RPL9P9      RPL9P8
##      sym_in_data   sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 1259    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:    C1orf112  C1orf112    C1orf112
## 2:      MAD1L1    MAD1L1      MAD1L1
## 3:        ICA1      ICA1        ICA1
## 4:     NDUFAF7   NDUFAF7     NDUFAF7
## 5:         ST7       ST7         ST7
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1255    2
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:   HIST1H2BD     H2BC5       H2BC5
## 2:   HIST1H2BC     H2BC5       H2BC5
## 3:      RPL9P8    RPL9P8      RPL9P8
## 4:      RPL9P9    RPL9P8      RPL9P8
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "RPL9P9", gene_symbol:="RPL9P9"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 1259    3
gene_info[1:2,]
##    sym_in_data sym_limma gene_symbol
## 1:    C1orf112  C1orf112    C1orf112
## 2:      MAD1L1    MAD1L1      MAD1L1
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")
  
  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.05)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=120)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "MAP4K5"   "PAFAH1B3" "SPTLC2"   "CAPN15"   "PIEZO1"   "RASSF4"  
##  [7] "CACYBP"   "CCR2"     "NUP85"    "UROD"     "NPHP4"    "HRSP12"  
## [13] "ANXA1"    "ASNSD1"   "MZT2B"    "CENPH"    "RNF207"   "C1orf123"
## [19] "RNF25"    "LRRC28"   "UBB"      "HSPA1B"   "MYCBP"    "TMEM63A"

## [1] 2
##  [1] "IL4R"        "UTP6"        "ACVR1"       "SMUG1"       "GLUL"       
##  [6] "GNG4"        "ZNF816"      "RPS27AP2"    "CTB-33G10.1" "TXK"        
## [11] "ARRDC2"      "RASGRF2"     "EGR1"        "ACVR2A"      "FOSB"       
## [16] "JUND"        "SKIL"        "SORL1"       "SGSM2"       "GNAQ"       
## [21] "LDLRAP1"     "DHRS3"       "TC2N"        "TMEM150A"    "DNHD1"

## [1] 3
##  [1] "CCL3"     "TIMM21"   "WDR7"     "THOC5"    "PON2"     "KLRB1"   
##  [7] "BRD8"     "EMG1"     "CCL4"     "ACTR10"   "KLRC1"    "GCHFR"   
## [13] "CXCL13"   "PANK4"    "C19orf47" "EXTL2"    "GTSF1"    "METTL18" 
## [19] "WRB"      "COA3"     "CMC1"     "CCL4L2"   "MSH5"     "CCL4L1"  
## [25] "CCL3L1"   "KLRC2"

## [1] 4
##  [1] "ID3"            "PARP2"          "MMS22L"         "PLIN2"         
##  [5] "ATF3"           "HIST1H2BC"      "PLA2G6"         "HIST2H2AA4"    
##  [9] "HIST1H2BN"      "HIST1H3G"       "RP11-386G11.10" "IFRD1"         
## [13] "RGCC"           "NR4A3"          "RASGEF1B"       "NR4A2"         
## [17] "TIPARP"         "PRMT10"         "PFKFB3"         "RBKS"          
## [21] "CHD2"           "ANKRD37"        "MTND1P23"       "PSMD6-AS2"     
## [25] "KCNQ1OT1"       "RP11-434H6.7"

## [1] 5
##  [1] "ZNF419"        "FAM45A"        "RNF121"        "NDUFAF6"      
##  [5] "TMSB15B"       "CCDC28B"       "ZBTB39"        "RHEBL1"       
##  [9] "LRRC45"        "RP11-23P13.6"  "ZNF93"         "ENTPD5"       
## [13] "ZNF511"        "ZNF525"        "RP11-122G18.7" "AC115617.2"   
## [17] "RP11-262H14.3" "AC007041.2"    "AC004069.2"    "MIR146A"      
## [21] "RPPH1"         "PMF1-BGLAP"    "RP11-209D14.4" "AL358333.1"   
## [25] "CTD-3214H19.4" "CTD-2545M3.2"  "RP11-282O18.6"

## [1] 6
##  [1] "NT5C2"         "PMM1"          "GALK1"         "GNPTAB"       
##  [5] "STYXL1"        "PGPEP1"        "XAF1"          "AHI1"         
##  [9] "CLSTN3"        "WARS"          "TATDN1"        "TMPRSS3"      
## [13] "DNAJC18"       "TMEM107"       "SPATA13"       "PSMG1"        
## [17] "TNFRSF18"      "EFCAB7"        "TRGV3"         "RP11-64B16.2" 
## [21] "RP11-247I13.3" "RP5-1053E7.3"  "UBXN2B"        "RP11-782C8.1" 
## [25] "PSMC1P9"       "EFTUD1P1"

## [1] 7
##  [1] "MCM9"          "TMEM241"       "KHDC1"         "IRF8"         
##  [5] "MPPE1"         "SLAMF8"        "FCRL3"         "MGAT4B"       
##  [9] "AMDHD2"        "LZTFL1"        "PRIMPOL"       "GNGT2"        
## [13] "TRIM68"        "NINJ2"         "ZBTB8OS"       "ABHD16B"      
## [17] "PTGES3P2"      "PHBP3"         "GS1-124K5.2"   "PA2G4P4"      
## [21] "NPM1P34"       "BDH2P1"        "RP11-552M11.4" "MS4A1"        
## [25] "SERHL2"

## [1] 8
##  [1] "TRPT1"         "TRBV4-2"       "TRAV12-2"      "RP11-383G10.3"
##  [5] "RPS19P1"       "NPM1P19"       "Z97634.3"      "RP11-32B5.1"  
##  [9] "NDUFA9P1"      "RP11-33B1.1"   "HNRNPA1P50"    "DGKA"         
## [13] "TCF7"          "CCR7"          "PCNXL2"        "CDC42EP3"     
## [17] "ZBTB20"        "DENND5A"       "POU6F1"        "SELL"         
## [21] "RPS26P6"       "RP11-91K8.4"   "WHAMMP2"       "TMEM9B-AS1"   
## [25] "RP11-747H7.3"  "CTD-2017D11.1" "RP11-51J9.5"

## [1] 9
##  [1] "APH1B"         "SEC24D"        "ATF4P3"        "RPL7AP31"     
##  [5] "HNRNPA1P55"    "HNRNPA1P16"    "APBA2"         "TMEM66"       
##  [9] "ZMAT1"         "NPIPB3"        "AC018720.10"   "EIF1"         
## [13] "NR1D2"         "NPIPB4"        "HLA-J"         "RP11-572P18.1"
## [17] "MTND2P28"      "CTD-2328D6.1"  "RPL41"         "ACTBP12"      
## [21] "RPS3AP6"       "HLA-W"         "NPIPB5"        "MTATP6P1"     
## [25] "RP11-320M16.1" "CTD-2031P19.4"

## [1] 10
##  [1] "SMG6"      "DUSP12"    "MICALL1"   "CNOT2"     "RCOR3"     "MTFR1L"   
##  [7] "CCDC77"    "METTL8"    "FOPNL"     "CASP1"     "KSR1"      "VPS37A"   
## [13] "TMEM208"   "TMEM126A"  "CEP63"     "CNOT10"    "CEP57L1"   "RN7SL749P"
## [19] "TNRC6C"    "CLDND1"    "TNRC6B"    "CNOT6L"    "BTG2"      "RALGAPA1" 
## [25] "PLA2G4B"

## [1] 11
##  [1] "POLA2"      "USP28"      "WDR3"       "WIPI1"      "PIK3C3"    
##  [6] "ULK2"       "WDR13"      "IFIT3"      "IFIT2"      "L3HYPDH"   
## [11] "RSAD2"      "BCAS3"      "PELP1"      "BAP1"       "WDR41"     
## [16] "PHAX"       "ATG13"      "IFIT1"      "PIK3R4"     "MBD5"      
## [21] "CATSPER2P1" "RAB12"      "C9orf72"    "FBXO32"

## [1] 12
##  [1] "CD200"         "OCEL1"         "MS4A6A"        "PDE6B"        
##  [5] "NUDT7"         "TMEM138"       "TMEM155"       "C9orf89"      
##  [9] "GSTM4"         "NME6"          "EEF1GP1"       "TMEM194B"     
## [13] "RSC1A1"        "AC009403.2"    "RP3-340B19.5"  "PGAM4"        
## [17] "RP11-247I13.6" "AC090804.1"    "BHLHE40-AS1"   "GAPDHP2"      
## [21] "FAM21FP"       "KB-1507C5.2"   "SF3A3P2"       "AP000620.1"   
## [25] "MTRNR2L3"      "RP11-493L12.6" "AC006129.4"

## [1] 13
##  [1] "TM7SF3"        "MAP3K13"       "POLD3"         "AAAS"         
##  [5] "TIMM9"         "PLA2G15"       "POP4"          "FAHD2A"       
##  [9] "GALE"          "NR2C1"         "TIMM10"        "FHOD1"        
## [13] "UCK2"          "GALK2"         "XRCC6BP1"      "MEI1"         
## [17] "POP5"          "CTDNEP1"       "DALRD3"        "PIGW"         
## [21] "WDR53"         "UCKL1"         "ABHD14A"       "RP11-304L19.5"
## [25] "GGT7"          "OBSCN"

## [1] 14
##  [1] "CRTAM"         "TFDP2"         "IRF4"          "KIR3DL1"      
##  [5] "TMEM134"       "AC004840.9"    "SMIM19"        "FCRL6"        
##  [9] "SLC9A9"        "FAM49A"        "ZNF649"        "NPM1P32"      
## [13] "CD27-AS1"      "RNU2-63P"      "RP11-567G24.1" "PATL2"        
## [17] "SEPT7P7"       "GYG1P3"        "BTF3P10"       "ERP29P1"      
## [21] "RP11-254B13.4" "AC011747.4"    "STARD4-AS1"    "GAPDHP38"     
## [25] "RNU1-125P"     "AL161784.1"    "AOAH"

## [1] 15
##  [1] "EVC"           "LTBP4"         "ARHGEF18"      "SCRN2"        
##  [5] "GPN2"          "OXSM"          "FAM122B"       "DBF4B"        
##  [9] "TAF10"         "CCDC101"       "ZBED2"         "RP11-773D16.1"
## [13] "RP11-119F19.2" "AC093391.2"    "MAK"           "MAN1C1"       
## [17] "ATXN7"         "POLR2J3"       "YPEL2"         "TTC3"         
## [21] "PLGLB1"        "NELL2"         "MIR4461"       "GPRASP1"      
## [25] "RP11-514P8.6"  "LINC00944"

## [1] 16
##  [1] "STAU2"     "NOP16"     "YBX3"      "NTHL1"     "SART3"     "KHSRP"    
##  [7] "FMR1"      "DOT1L"     "DHX58"     "DKC1"      "APTX"      "UTP23"    
## [13] "HIST1H2BD" "MARS"      "KBTBD2"    "PCGF5"     "ZFP62"     "ZNF770"   
## [19] "C9orf114"  "ZBTB48"    "MIR29B1"   "PDCD4"     "ZNF589"    "TSPYL2"

## [1] 17
##  [1] "C1orf112" "VPS41"    "AGA"      "BCL3"     "RNF13"    "CDKL1"   
##  [7] "GSR"      "TYK2"     "SNX8"     "NCOA7"    "FIGNL1"   "TTLL4"   
## [13] "IDH1"     "TMOD3"    "TAMM41"   "WDR43"    "COG5"     "ETFDH"   
## [19] "ZNF431"   "ANKRD28"  "TREX1"    "IFRD2"    "PPIL3"    "PLXND1"  
## [25] "SELM"

## [1] 18
##  [1] "MAD1L1"   "RNF10"    "PTPN18"   "GABPB1"   "SMC4"     "C3orf14" 
##  [7] "STK25"    "CKS2"     "CCNB1"    "PMAIP1"   "HIRIP3"   "CCNB2"   
## [13] "ANAPC10"  "CTSB"     "TBCA"     "RRM2"     "ASB8"     "EDARADD" 
## [19] "TDRD7"    "HIST1H4C" "DDX3Y"    "FAM117B"  "NPAT"     "PLK3"    
## [25] "TSC22D2"

## [1] 19
##  [1] "DYRK4"            "ZNF684"           "C2orf43"          "DTD2"            
##  [5] "SLC35A5"          "ZNF80"            "ZNF678"           "ZNF749"          
##  [9] "C10orf128"        "TRAV16"           "AC018804.7"       "XXbac-B562F10.11"
## [13] "KIR2DS4"          "AC012318.3"       "GK3P"             "GOT2P3"          
## [17] "LINC00484"        "AC002331.1"       "RP11-305L7.3"     "CTD-2301A4.3"    
## [21] "AC005329.7"       "RP11-473N11.2"    "AP000462.1"       "RP11-317N8.4"    
## [25] "CD248"            "CHRM3-AS2"        "RP1-187B23.1"     "RP11-1000B6.3"

## [1] 20
##  [1] "RP3-324O17.4"  "NSMCE4A"       "NAP1L4P1"      "MTHFD2P7"     
##  [5] "CCDC30"        "TRBV20-1"      "RP11-365H23.1" "AC009506.2"   
##  [9] "CNOT7P1"       "ETF1P2"        "RPL37P23"      "HSPA8P4"      
## [13] "CTC-436P18.1"  "TRAV1-2"       "RP11-511H9.4"  "ANP32AP1"     
## [17] "HMGB1P8"       "RCAN3"         "NEK1"          "ACRC"         
## [21] "FILIP1L"       "TCEAL3"        "AC084018.1"    "LINC00674"    
## [25] "RP11-44N11.1"  "RP11-727F15.9" "QRSL1P3"

## [1] 21
##  [1] "TBPL1"         "DUS4L"         "LOXL3"         "FANCL"        
##  [5] "CD160"         "DHDDS"         "NPC2"          "MCEE"         
##  [9] "MT2A"          "MTHFS"         "GTF2A2"        "USF1"         
## [13] "KIAA1841"      "NFU1"          "MT1E"          "MT1X"         
## [17] "TECPR1"        "TRAV13-1"      "TSTD1"         "RNU2-2P"      
## [21] "UBE2D3P2"      "PET100"        "LINC00493"     "RP11-1012A1.7"
## [25] "MIR142"

## [1] 22
##  [1] "GBA2"     "DERL2"    "ERGIC2"   "RABGGTA"  "ILVBL"    "GOSR2"   
##  [7] "CPD"      "SRM"      "NQO2"     "PPFIA1"   "FADS2"    "MAP3K6"  
## [13] "LIMD1"    "FADS1"    "AMFR"     "FRS2"     "JSRP1"    "PAAF1"   
## [19] "NT5DC1"   "CSNK1A1L" "AGER"     "DKK3"     "EPHA4"    "MPP7"    
## [25] "KLHL25"

## [1] 23
##  [1] "ZNF268"        "FAH"           "ZNF85"         "PECR"         
##  [5] "CCRL2"         "MTERF"         "RRNAD1"        "ADCK5"        
##  [9] "SNHG11"        "GEN1"          "C2orf76"       "ZNF257"       
## [13] "ANAPC10P1"     "RP4-694B14.5"  "PIN4P1"        "RP3-342P20.2" 
## [17] "ZNF826P"       "RPL13P5"       "RN7SL56P"      "TPM3P6"       
## [21] "RNF138P1"      "RP11-889L3.4"  "RP11-844P9.3"  "RP11-638I2.10"
## [25] "CTD-2022H16.3" "CTB-31O20.3"   "RP11-297D21.4"

## [1] 24
##  [1] "DEPDC5"        "PIM2"          "FGFBP2"        "PPCDC"        
##  [5] "DEF8"          "PPOX"          "SPON2"         "FCGR3B"       
##  [9] "CX3CR1"        "ITGAM"         "EIF2S3L"       "FCGR3A"       
## [13] "HIST2H2BF"     "TRAV35"        "RP11-270C12.3" "RPS12P26"     
## [17] "SNRPEP4"       "PTGES3P1"      "FAM200B"       "RP11-81H14.2" 
## [21] "MIR3615"       "LUZP6"         "AC024592.12"   "FRMD4A"       
## [25] "PER1"

## [1] 25
##  [1] "MFAP3"        "TNFRSF9"      "C14orf93"     "SLC25A14"     "SLC11A2"     
##  [6] "MDGA1"        "CD86"         "TNFSF10"      "IFI6"         "DPH6"        
## [11] "IFI44L"       "IFI44"        "LAIR2"        "TRANK1"       "CXCR6"       
## [16] "TRBV9"        "TRAV19"       "CRYGS"        "AC104820.2"   "MIR155HG"    
## [21] "AC092580.4"   "AKAP2"        "CCL3L3"       "RP11-345J4.6" "AMICA1"      
## [26] "MUC20"

## [1] 26
##  [1] "DHPS"         "DHRS12"       "ENO3"         "PRR4"         "PTP4A1"      
##  [6] "MYB"          "CLU"          "NARF"         "MTF2"         "SUV39H2"     
## [11] "SFR1"         "NAA15"        "FBXO22"       "SWI5"         "RMDN1"       
## [16] "BOLA1"        "KATNA1"       "DUSP28"       "ZNF544"       "WDHD1"       
## [21] "RP13-638C3.4" "PMEPA1"       "LEF1"         "NAA16"        "GOLGA8A"

## [1] 27
##  [1] "REXO1"         "ZNF683"        "PDCL3P5"       "AC098614.2"   
##  [5] "NPM1P33"       "CALM2P3"       "TCP1P1"        "AC144530.1"   
##  [9] "CDC42P1"       "RP3-395M20.8"  "RP11-382J24.2" "HSPA8P5"      
## [13] "AC006483.1"    "AC139149.1"    "MIR3661"       "RASGRP2"      
## [17] "CPQ"           "FOXP1"         "C1orf162"      "PLAC8"        
## [21] "FAM102A"       "GPR183"        "SATB1"         "CTD-3092A11.1"
## [25] "RP11-632K20.7" "RP1-313I6.12"

## [1] 28
##  [1] "CHI3L2"         "PUS7L"          "IL10"           "XCL1"          
##  [5] "XCL2"           "CMAHP"          "ZNF720"         "RPSAP54"       
##  [9] "LDHBP2"         "MYL5"           "RP11-2F9.3"     "ARPC3P1"       
## [13] "HNRNPKP2"       "RPL19P21"       "SKP1P1"         "RPS11P5"       
## [17] "EEF1A1P14"      "RAB1C"          "AP000476.1"     "RPS19P3"       
## [21] "AC018462.3"     "EEF1A1P10"      "RP11-179A18.1"  "SRP14-AS1"     
## [25] "RP11-686D22.10" "RP4-739H11.4"   "AP001468.1"

## [1] 29
##  [1] "GZMB"          "GMPR2"         "NHP2P1"        "IL18RAP"      
##  [5] "PTGER2"        "NARG2"         "FUOM"          "TNFAIP8L2"    
##  [9] "HOPX"          "ARHGAP11B"     "TRAV14DV4"     "TRAV29DV5"    
## [13] "RP13-383K5.4"  "KANSL1-AS1"    "RP4-742C19.12" "RP11-350G13.1"
## [17] "RP4-800M22.1"  "PRDX3P1"       "TPI1P2"        "ADH5P4"       
## [21] "RP11-446E9.1"  "GAPDHP60"      "RP11-402J6.3"  "USP30-AS1"    
## [25] "SNORD3A"       "TPMTP1"        "CTB-52I2.4"

## [1] 30
##  [1] "EVI5"          "CDKN3"         "RAD51C"        "STMN1"        
##  [5] "TXNDC17"       "TROAP"         "BORA"          "GEMIN6"       
##  [9] "MITD1"         "PAQR4"         "CDCA7L"        "C11orf82"     
## [13] "ZFYVE19"       "UBE2C"         "TYMS"          "LRTOMT"       
## [17] "SESTD1"        "TCTEX1D2"      "NCKIPSD"       "MDP1"         
## [21] "RP11-378G13.2" "SAPCD1"        "PAICSP4"       "HMGB1P24"     
## [25] "RP11-307P22.1" "RP11-265N6.2"

## [1] 31
##  [1] "P2RX5"    "PRMT7"    "SNX14"    "PWP1"     "ACTL6A"   "PRSS23"  
##  [7] "CXXC1"    "TOMM40L"  "B2M"      "CAPN12"   "AFMID"    "KNTC1"   
## [13] "TMEM120A" "TMSB4X"   "RGPD5"    "PPP2R5C"  "STK17B"   "PPIL4"   
## [19] "BTG1"     "SSH2"     "EPHA1"    "PIP4K2A"  "CXCR5"    "PPP1R10" 
## [25] "MALAT1"

## [1] 32
##  [1] "GLT8D1"   "FAR2"     "SH3BP2"   "KIF4A"    "MAN2B1"   "IFNG"    
##  [7] "SCPEP1"   "DNPEP"    "COQ3"     "TPGS2"    "SPPL2A"   "LPP"     
## [13] "TIFA"     "MX1"      "CCDC51"   "SLC39A13" "PRF1"     "CADM1"   
## [19] "GM2A"     "HIST1H4J" "HLA-DRA"  "NCR3"     "DNASE1"   "ATP5J2"  
## [25] "LMO7"

## [1] 33
##  [1] "ERCC2"        "GTF2H3"       "NPL"          "DRAM2"        "NTAN1"       
##  [6] "C19orf55"     "ZNF613"       "ZNF765"       "RUFY2"        "POTEKP"      
## [11] "TRGV10"       "TRGV5"        "TRAV8-2"      "TRAV24"       "TRGC2"       
## [16] "TSEN15P1"     "TRGV2"        "CTC-338M12.4" "RP11-693N9.2" "ZEB1-AS1"    
## [21] "LINC00426"    "RP4-728D4.2"  "PVT1"         "TRGV7"        "SCML1"       
## [26] "RP13-488H8.1" "SLC7A5P1"

## [1] 34
##  [1] "IMP4"   "RPS17"  "RPL34"  "RPS12"  "RPS25"  "RPL21"  "RPL5"   "RPS10" 
##  [9] "RPS2"   "RPL13A" "RPS27A" "RPL32"  "RPS3A"  "RPL7"   "RPS3"   "RPL30" 
## [17] "RPL9"   "RPS14"  "RPL27A" "RPLP2"  "RPS27"  "RPS23"  "RPS26"  "RPL23A"
## [25] "RPS18"

## [1] 35
##  [1] "OAS1"           "MFSD9"          "IFI27"          "EFCAB4A"       
##  [5] "LCORL"          "ZNF28"          "CPT1B"          "METTL6"        
##  [9] "TRDV1"          "TRDC"           "NAP1L1P3"       "RP11-12M9.4"   
## [13] "PSMA6P1"        "LYPLA1P3"       "CCT8P1"         "HNRNPA1P21"    
## [17] "RP11-288E14.2"  "RBBP4P1"        "GMPSP1"         "RP11-613F22.5" 
## [21] "RBBP4P5"        "RP11-416A17.6"  "TRAV30"         "RP11-265N6.3"  
## [25] "ATP5A1P3"       "ACTBP9"         "RP11-1094M14.7"

## [1] 36
##  [1] "UBXN8"   "SLC12A4" "YIPF2"   "SLC41A2" "LINS"    "MFSD6"   "TMEM237"
##  [8] "ALG8"    "C2orf44" "GLB1"    "LGALS9C" "SLC38A9" "YIPF6"   "DBNDD2" 
## [15] "PIK3IP1" "SLC38A1" "UAP1"    "YPEL5"   "NBPF14"  "SLC38A2" "EPB41"  
## [22] "NBPF10"  "RGPD6"   "CD55"    "FAM169A" "NBPF16"

## [1] 37
##  [1] "ICA1"     "MED24"    "NSMAF"    "ZNRD1"    "REC8"     "POLR2I"  
##  [7] "LIG1"     "POLE4"    "EPAS1"    "AKR1A1"   "UXT"      "CHTF18"  
## [13] "SLC2A8"   "TBCD"     "PPP4R1"   "MED11"    "POLR2H"   "IL15"    
## [19] "MZT2A"    "POLE"     "PPARA"    "CHAMP1"   "STK19"    "C17orf89"
## [25] "MRPL33"

## [1] 38
##  [1] "RABGAP1"       "CD84"          "RGS1"          "HAVCR1"       
##  [5] "EGR2"          "TEP1"          "TMEM19"        "FAM50B"       
##  [9] "NAALADL1"      "THAP2"         "SH2D1B"        "SGMS1"        
## [13] "TRAV21"        "MAPKAPK5-AS1"  "RP11-271C24.3" "WDR65"        
## [17] "CTB-4E7.1"     "CTD-2521M24.9" "RP11-705C15.5" "ABLIM1"       
## [21] "RIMKLB"        "PHLDB3"        "LDOC1"         "KLRK1"        
## [25] "HCG4P5"        "ATP1B3-AS1"

## [1] 39
##  [1] "NDUFAF7"  "ACADVL"   "NFE2L1"   "MTIF2"    "MRPS33"   "NDUFAF5" 
##  [7] "CDK5RAP1" "EARS2"    "MTFMT"    "THG1L"    "ATPAF1"   "C6orf203"
## [13] "MTO1"     "MRRF"     "CETN3"    "MALSU1"   "MRPL55"   "NDUFS4"  
## [19] "C7orf55"  "MRPL48"   "ZNF782"   "OXLD1"    "CCDC109B" "TSHZ2"

## [1] 40
##  [1] "MBNL3"         "DDX49"         "PDZD2"         "RPSAP47"      
##  [5] "RP1-102E24.1"  "UBA52P6"       "RP1-182O16.1"  "RPL19P12"     
##  [9] "RP11-350G8.3"  "RPSAP4"        "MGAT4A"        "HECA"         
## [13] "C1orf63"       "PLXDC1"        "TRABD2A"       "RPS3AP26"     
## [17] "AC021593.1"    "LTB"           "RPL10P3"       "RPL9P8"       
## [21] "RPL3P4"        "RPL9P9"        "RPL9P7"        "RP11-285F7.2" 
## [25] "MTND4P12"      "ZNF10"         "RP11-335G20.7"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_2"
##  [1] "IL4R"        "UTP6"        "ACVR1"       "SMUG1"       "GLUL"       
##  [6] "GNG4"        "ZNF816"      "RPS27AP2"    "CTB-33G10.1" "TXK"        
## [11] "ARRDC2"      "RASGRF2"     "EGR1"        "ACVR2A"      "FOSB"       
## [16] "JUND"        "SKIL"        "SORL1"       "SGSM2"       "GNAQ"       
## [21] "LDLRAP1"     "DHRS3"       "TC2N"        "TMEM150A"    "DNHD1"      
## $go_bp
##             category over_represented_pvalue under_represented_pvalue
## 3547 response to bmp            5.727704e-06                0.9999999
##      numDEInCat numInCat        FDR
## 3547          5       11 0.02340913
## 
## [1] "set_4"
##  [1] "ID3"            "PARP2"          "MMS22L"         "PLIN2"         
##  [5] "ATF3"           "HIST1H2BC"      "PLA2G6"         "HIST2H2AA4"    
##  [9] "HIST1H2BN"      "HIST1H3G"       "RP11-386G11.10" "IFRD1"         
## [13] "RGCC"           "NR4A3"          "RASGEF1B"       "NR4A2"         
## [17] "TIPARP"         "PRMT10"         "PFKFB3"         "RBKS"          
## [21] "CHD2"           "ANKRD37"        "MTND1P23"       "PSMD6-AS2"     
## [25] "KCNQ1OT1"       "RP11-434H6.7"  
## $reactome
##                                                                                          category
## 786         runx1 regulates genes involved in megakaryocyte differentiation and platelet function
## 10  activated pkn1 stimulates transcription of ar androgen receptor regulated genes klk2 and klk3
## 63                                       assembly of the orc complex at the origin of replication
## 122                         chromatin modifications during the maternal to zygotic transition mzt
## 209                                                                               dna methylation
## 249                                   ercc6 csb and ehmt2 g9a positively regulate rrna expression
## 746                                                                     rho gtpases activate pkns
## 879                                                    sirt1 negatively regulates rrna expression
## 639                                                              prc2 methylates histones and dna
## 285                                     formation of the beta catenin tcf transactivating complex
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 786            4.313170e-06                0.9999999          5       13
## 10             2.519897e-05                0.9999996          4        9
## 63             2.519897e-05                0.9999996          4        9
## 122            2.519897e-05                0.9999996          4        9
## 209            2.519897e-05                0.9999996          4        9
## 249            2.519897e-05                0.9999996          4        9
## 746            2.519897e-05                0.9999996          4        9
## 879            2.519897e-05                0.9999996          4        9
## 639            4.141248e-05                0.9999991          4       10
## 285            6.416829e-05                0.9999984          4       11
##             FDR
## 786 0.003219168
## 10  0.003219168
## 63  0.003219168
## 122 0.003219168
## 209 0.003219168
## 249 0.003219168
## 746 0.003219168
## 879 0.003219168
## 639 0.004702617
## 285 0.005961818
## 
## $immune
##                                           category over_represented_pvalue
## 1235 gse17974 0h vs 0.5h in vitro act cd4 tcell dn            2.969641e-07
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1235                        1          6       15 0.00151422
## 
## [1] "set_8"
##  [1] "TRPT1"         "TRBV4-2"       "TRAV12-2"      "RP11-383G10.3"
##  [5] "RPS19P1"       "NPM1P19"       "Z97634.3"      "RP11-32B5.1"  
##  [9] "NDUFA9P1"      "RP11-33B1.1"   "HNRNPA1P50"    "DGKA"         
## [13] "TCF7"          "CCR7"          "PCNXL2"        "CDC42EP3"     
## [17] "ZBTB20"        "DENND5A"       "POU6F1"        "SELL"         
## [21] "RPS26P6"       "RP11-91K8.4"   "WHAMMP2"       "TMEM9B-AS1"   
## [25] "RP11-747H7.3"  "CTD-2017D11.1" "RP11-51J9.5"  
## $immune
##                                                            category
## 56                              goldrath eff vs memory cd8 tcell dn
## 3792         gse40274 ctrl vs eos transduced activated cd4 tcell dn
## 4851                              gse9650 naive vs eff cd8 tcell up
## 4974                           kaech naive vs day8 eff cd8 tcell up
## 2082                   gse23568 ctrl vs id3 transduced cd8 tcell up
## 4086 gse43863 day6 eff vs day150 mem ly6c int cxcr5pos cd4 tcell dn
## 4844                        gse9650 effector vs memory cd8 tcell dn
## 4969                          kaech day8 eff vs memory cd8 tcell dn
## 2079                    gse23568 ctrl transduced vs wt cd8 tcell dn
## 2746  gse2935 uv inactivated vs live sendai virus inf macrophage up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 56              5.535430e-06                0.9999999          5       23
## 3792            8.549934e-06                0.9999998          5       25
## 4851            1.261357e-05                0.9999996          5       27
## 4974            1.275095e-05                0.9999996          5       27
## 2082            2.443384e-05                0.9999995          4       15
## 4086            3.363985e-05                0.9999992          4       16
## 4844            4.275140e-05                0.9999982          5       34
## 4969            4.302329e-05                0.9999982          5       34
## 2079            5.471493e-05                0.9999985          4       18
## 2746            5.565429e-05                0.9999985          4       18
##             FDR
## 56   0.01625427
## 3792 0.01625427
## 4851 0.01625427
## 4974 0.01625427
## 2082 0.02491763
## 4086 0.02742197
## 4844 0.02742197
## 4969 0.02742197
## 2079 0.02837812
## 2746 0.02837812
## 
## [1] "set_10"
##  [1] "SMG6"      "DUSP12"    "MICALL1"   "CNOT2"     "RCOR3"     "MTFR1L"   
##  [7] "CCDC77"    "METTL8"    "FOPNL"     "CASP1"     "KSR1"      "VPS37A"   
## [13] "TMEM208"   "TMEM126A"  "CEP63"     "CNOT10"    "CEP57L1"   "RN7SL749P"
## [19] "TNRC6C"    "CLDND1"    "TNRC6B"    "CNOT6L"    "BTG2"      "RALGAPA1" 
## [25] "PLA2G4B"  
## $reactome
##                                                                                                             category
## 981                                                                               transcriptional regulation by tp53
## 952 tp53 regulates transcription of additional cell cycle genes whose exact role in the p53 pathway remain uncertain
## 954                                                                 tp53 regulates transcription of cell cycle genes
## 172                                                                                            deadenylation of mrna
## 171                                                                               deadenylation dependent mrna decay
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 981            2.090452e-06                0.9999999          7       35
## 952            2.226282e-06                1.0000000          4        6
## 954            1.011921e-05                0.9999999          4        8
## 172            3.489845e-05                0.9999998          3        4
## 171            1.709831e-04                0.9999977          3        6
##             FDR
## 981 0.001137630
## 952 0.001137630
## 954 0.003447277
## 172 0.008916553
## 171 0.034948947
## 
## $go_bp
##                                                                      category
## 1917                          nuclear transcribed mrna poly a tail shortening
## 1912                               nuclear transcribed mrna catabolic process
## 1913 nuclear transcribed mrna catabolic process deadenylation dependent decay
## 3702                                                      rna destabilization
## 1372                                                   mrna catabolic process
## 2380                            positive regulation of mrna metabolic process
## 3201                                     regulation of mrna catabolic process
## 3700                                                    rna catabolic process
## 3202                                     regulation of mrna metabolic process
## 1456                           negative regulation of amide metabolic process
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1917            2.977880e-09                1.0000000          6        8
## 1912            1.120755e-08                1.0000000          7       15
## 1913            2.182607e-08                1.0000000          6       10
## 3702            2.834440e-07                1.0000000          6       14
## 1372            5.185335e-07                1.0000000          7       24
## 2380            7.335956e-07                1.0000000          6       16
## 3201            1.124232e-06                1.0000000          6       17
## 3700            2.188583e-06                0.9999999          7       29
## 3202            4.655463e-06                0.9999998          6       21
## 1456            3.558517e-05                0.9999979          6       29
##               FDR
## 1917 1.217060e-05
## 1912 2.290263e-05
## 1913 2.973439e-05
## 3702 2.896089e-04
## 1372 4.238493e-04
## 2380 4.997009e-04
## 3201 6.563908e-04
## 3700 1.118092e-03
## 3202 2.114097e-03
## 1456 1.454366e-02
## 
## [1] "set_11"
##  [1] "POLA2"      "USP28"      "WDR3"       "WIPI1"      "PIK3C3"    
##  [6] "ULK2"       "WDR13"      "IFIT3"      "IFIT2"      "L3HYPDH"   
## [11] "RSAD2"      "BCAS3"      "PELP1"      "BAP1"       "WDR41"     
## [16] "PHAX"       "ATG13"      "IFIT1"      "PIK3R4"     "MBD5"      
## [21] "CATSPER2P1" "RAB12"      "C9orf72"    "FBXO32"    
## $go_bp
##                                              category over_represented_pvalue
## 2830                          regulation of autophagy            1.512986e-06
## 154                        autophagosome organization            2.206566e-06
## 4025                             vacuole organization            6.714553e-06
## 1173                                   macroautophagy            8.685364e-06
## 2684 protein localization to phagophore assembly site            1.772104e-05
## 3639                           response to starvation            6.471983e-05
## 1924                                      nucleophagy            6.891717e-05
## 3142                     regulation of macroautophagy            8.208376e-05
## 1171                              lysosomal transport            8.238985e-05
##      under_represented_pvalue numDEInCat numInCat         FDR
## 2830                0.9999999          7       25 0.004509117
## 154                 1.0000000          5       10 0.004509117
## 4025                0.9999998          5       12 0.008874270
## 1173                0.9999996          6       21 0.008874270
## 2684                1.0000000          3        3 0.014485176
## 3639                0.9999969          5       18 0.037414148
## 1924                0.9999996          3        4 0.037414148
## 3142                0.9999977          4       10 0.037414148
## 1171                0.9999977          4       10 0.037414148
## 
## [1] "set_18"
##  [1] "MAD1L1"   "RNF10"    "PTPN18"   "GABPB1"   "SMC4"     "C3orf14" 
##  [7] "STK25"    "CKS2"     "CCNB1"    "PMAIP1"   "HIRIP3"   "CCNB2"   
## [13] "ANAPC10"  "CTSB"     "TBCA"     "RRM2"     "ASB8"     "EDARADD" 
## [19] "TDRD7"    "HIST1H4C" "DDX3Y"    "FAM117B"  "NPAT"     "PLK3"    
## [25] "TSC22D2" 
## $reactome
##                                     category over_represented_pvalue
## 138 condensation of prometaphase chromosomes            1.472594e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 138                        1          3        3 0.01504991
## 
## [1] "set_21"
##  [1] "TBPL1"         "DUS4L"         "LOXL3"         "FANCL"        
##  [5] "CD160"         "DHDDS"         "NPC2"          "MCEE"         
##  [9] "MT2A"          "MTHFS"         "GTF2A2"        "USF1"         
## [13] "KIAA1841"      "NFU1"          "MT1E"          "MT1X"         
## [17] "TECPR1"        "TRAV13-1"      "TSTD1"         "RNU2-2P"      
## [21] "UBE2D3P2"      "PET100"        "LINC00493"     "RP11-1012A1.7"
## [25] "MIR142"       
## $reactome
##                         category over_represented_pvalue
## 485 metallothioneins bind metals            9.853714e-06
## 737       response to metal ions            9.853714e-06
##     under_represented_pvalue numDEInCat numInCat         FDR
## 485                        1          3        3 0.005035248
## 737                        1          3        3 0.005035248
## 
## $go_bp
##                                  category over_represented_pvalue
## 353      cellular response to cadmium ion            1.502551e-05
## 358       cellular response to copper ion            1.502551e-05
## 564          detoxification of copper ion            1.502551e-05
## 565  detoxification of inorganic compound            1.502551e-05
## 3548              response to cadmium ion            1.502551e-05
## 3555               response to copper ion            1.502551e-05
## 1055   intracellular zinc ion homeostasis            5.835770e-05
## 420         cellular response to zinc ion            5.864869e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 353                 1.0000000          3        3 0.01023488
## 358                 1.0000000          3        3 0.01023488
## 564                 1.0000000          3        3 0.01023488
## 565                 1.0000000          3        3 0.01023488
## 3548                1.0000000          3        3 0.01023488
## 3555                1.0000000          3        3 0.01023488
## 1055                0.9999997          3        4 0.02996215
## 420                 0.9999997          3        4 0.02996215
## 
## [1] "set_25"
##  [1] "MFAP3"        "TNFRSF9"      "C14orf93"     "SLC25A14"     "SLC11A2"     
##  [6] "MDGA1"        "CD86"         "TNFSF10"      "IFI6"         "DPH6"        
## [11] "IFI44L"       "IFI44"        "LAIR2"        "TRANK1"       "CXCR6"       
## [16] "TRBV9"        "TRAV19"       "CRYGS"        "AC104820.2"   "MIR155HG"    
## [21] "AC092580.4"   "AKAP2"        "CCL3L3"       "RP11-345J4.6" "AMICA1"      
## [26] "MUC20"       
## $immune
##                                         category over_represented_pvalue
## 4555 gse6269 healthy vs staph aureus inf pbmc up            4.004366e-06
## 2427            gse2706 2h vs 8h r848 stim dc dn            1.417185e-05
## 1337   gse18791 ctrl vs newcastle virus dc 6h dn            2.301492e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 4555                0.9999998          6       22 0.02041826
## 2427                0.9999995          5       16 0.03613114
## 1337                0.9999987          6       29 0.03911769
## 
## [1] "set_27"
##  [1] "REXO1"         "ZNF683"        "PDCL3P5"       "AC098614.2"   
##  [5] "NPM1P33"       "CALM2P3"       "TCP1P1"        "AC144530.1"   
##  [9] "CDC42P1"       "RP3-395M20.8"  "RP11-382J24.2" "HSPA8P5"      
## [13] "AC006483.1"    "AC139149.1"    "MIR3661"       "RASGRP2"      
## [17] "CPQ"           "FOXP1"         "C1orf162"      "PLAC8"        
## [21] "FAM102A"       "GPR183"        "SATB1"         "CTD-3092A11.1"
## [25] "RP11-632K20.7" "RP1-313I6.12" 
## $immune
##                                                               category
## 595                gse14699 naive vs deletional tolerance cd8 tcell dn
## 1580                      gse20727 h2o2 vs ros inhibitor treated dc up
## 3848 gse40274 lef1 vs foxp3 and lef1 transduced activated cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 595             2.973401e-08                1.0000000          6       25
## 1580            1.997739e-05                0.9999996          4       19
## 3848            2.494993e-05                0.9999995          4       20
##               FDR
## 595  0.0001516137
## 1580 0.0424065620
## 3848 0.0424065620
## 
## [1] "set_30"
##  [1] "EVI5"          "CDKN3"         "RAD51C"        "STMN1"        
##  [5] "TXNDC17"       "TROAP"         "BORA"          "GEMIN6"       
##  [9] "MITD1"         "PAQR4"         "CDCA7L"        "C11orf82"     
## [13] "ZFYVE19"       "UBE2C"         "TYMS"          "LRTOMT"       
## [17] "SESTD1"        "TCTEX1D2"      "NCKIPSD"       "MDP1"         
## [21] "RP11-378G13.2" "SAPCD1"        "PAICSP4"       "HMGB1P24"     
## [25] "RP11-307P22.1" "RP11-265N6.2" 
## $immune
##                                                                              category
## 4962 howard nk cell inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 3dy up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4962            7.935581e-07                        1          5       10
##              FDR
## 4962 0.004046353
## 
## [1] "set_32"
##  [1] "GLT8D1"   "FAR2"     "SH3BP2"   "KIF4A"    "MAN2B1"   "IFNG"    
##  [7] "SCPEP1"   "DNPEP"    "COQ3"     "TPGS2"    "SPPL2A"   "LPP"     
## [13] "TIFA"     "MX1"      "CCDC51"   "SLC39A13" "PRF1"     "CADM1"   
## [19] "GM2A"     "HIST1H4J" "HLA-DRA"  "NCR3"     "DNASE1"   "ATP5J2"  
## [25] "LMO7"    
## $go_bp
##                                 category over_represented_pvalue
## 2895          regulation of cell killing            4.566378e-06
## 2172 positive regulation of cell killing            7.711744e-06
## 314                         cell killing            2.617120e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 2895                0.9999998          6       22 0.01575895
## 2172                0.9999998          5       14 0.01575895
## 314                 0.9999985          6       29 0.03565390
## 
## $immune
##                                               category over_represented_pvalue
## 3539 gse37605 treg vs tconv c57bl6 foxp3 fusion gfp dn            1.925098e-06
##      under_represented_pvalue numDEInCat numInCat         FDR
## 3539                        1          5       10 0.009816073
## 
## [1] "set_33"
##  [1] "ERCC2"        "GTF2H3"       "NPL"          "DRAM2"        "NTAN1"       
##  [6] "C19orf55"     "ZNF613"       "ZNF765"       "RUFY2"        "POTEKP"      
## [11] "TRGV10"       "TRGV5"        "TRAV8-2"      "TRAV24"       "TRGC2"       
## [16] "TSEN15P1"     "TRGV2"        "CTC-338M12.4" "RP11-693N9.2" "ZEB1-AS1"    
## [21] "LINC00426"    "RP4-728D4.2"  "PVT1"         "TRGV7"        "SCML1"       
## [26] "RP13-488H8.1" "SLC7A5P1"    
## $reactome
##                                       category over_represented_pvalue
## 283 formation of rna pol ii elongation complex            0.0001637134
## 286  formation of the early elongation complex            0.0001637134
## 370               hiv transcription elongation            0.0001637134
## 506                               mrna capping            0.0001637134
## 766  rna polymerase i transcription initiation            0.0001681170
## 767 rna polymerase i transcription termination            0.0001681170
## 371               hiv transcription initiation            0.0003302842
## 768 rna polymerase ii pre transcription events            0.0003302842
## 970            transcription of the hiv genome            0.0003302842
## 281    formation of incision complex in gg ner            0.0003557590
##     under_represented_pvalue numDEInCat numInCat        FDR
## 283                0.9999997          2        5 0.02863592
## 286                0.9999997          2        5 0.02863592
## 370                0.9999997          2        5 0.02863592
## 506                0.9999997          2        5 0.02863592
## 766                0.9999997          2        5 0.02863592
## 767                0.9999997          2        5 0.02863592
## 371                0.9999991          2        7 0.03635857
## 768                0.9999991          2        7 0.03635857
## 970                0.9999991          2        7 0.03635857
## 281                0.9999990          2        7 0.03635857
## 
## [1] "set_34"
##  [1] "IMP4"   "RPS17"  "RPL34"  "RPS12"  "RPS25"  "RPL21"  "RPL5"   "RPS10" 
##  [9] "RPS2"   "RPL13A" "RPS27A" "RPL32"  "RPS3A"  "RPL7"   "RPS3"   "RPL30" 
## [17] "RPL9"   "RPS14"  "RPL27A" "RPLP2"  "RPS27"  "RPS23"  "RPS26"  "RPL23A"
## [25] "RPS18" 
## $reactome
##                                                                                                  category
## 21  activation of the mrna upon binding of the cap binding complex and eifs and subsequent binding to 43s
## 117                                                                       cellular response to starvation
## 255                                                                     eukaryotic translation elongation
## 256                                                                     eukaryotic translation initiation
## 387                                                                                   influenza infection
## 471                                                             metabolism of amino acids and derivatives
## 547                                                                           nonsense mediated decay nmd
## 691                                                           regulation of expression of slits and robos
## 734                                                     response of eif2ak4 gcn2 to amino acid deficiency
## 783                                                                                       rrna processing
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat FDR
## 21                        0                        1         13       15   0
## 117                       0                        1         24       33   0
## 255                       0                        1         24       31   0
## 256                       0                        1         24       30   0
## 387                       0                        1         24       37   0
## 471                       0                        1         24       42   0
## 547                       0                        1         24       31   0
## 691                       0                        1         24       31   0
## 734                       0                        1         24       30   0
## 783                       0                        1         25       40   0
## 
## $go_bp
##                                category over_represented_pvalue
## 515             cytoplasmic translation            0.000000e+00
## 3693                ribosome biogenesis            1.928974e-09
## 3691 ribosomal small subunit biogenesis            7.669988e-09
## 3718             rrna metabolic process            1.025484e-05
## 3692                  ribosome assembly            2.168596e-05
##      under_represented_pvalue numDEInCat numInCat          FDR
## 515                 1.0000000         24       33 0.000000e+00
## 3693                1.0000000         11       35 3.941858e-06
## 3691                1.0000000          8       16 1.044908e-05
## 3718                0.9999994          7       26 1.047789e-02
## 3692                0.9999997          4        6 1.772611e-02
## 
## $immune
##                                                                  category
## 54                        gaucher pbmc yf vax stamaril unknown age 7dy dn
## 463                                 gse14000 translated rna vs mrna dc dn
## 1628                      gse2124 ctrl vs lymphotoxin beta treated mln up
## 1944                                gse22886 naive bcell vs neutrophil up
## 1974                                        gse22886 naive tcell vs dc up
## 2104               gse2405 0h vs 24h a phagocytophilum stim neutrophil up
## 2109                gse2405 0h vs 9h a phagocytophilum stim neutrophil dn
## 3108                           gse34205 healthy vs flu inf infant pbmc up
## 4014 gse41978 id2 ko vs id2 ko and bim ko klrg1 low effector cd8 tcell dn
## 4016                     gse41978 klrg1 high vs low effector cd8 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat FDR
## 54                         0                        1         14       32   0
## 463                        0                        1         12       26   0
## 1628                       0                        1         10       19   0
## 1944                       0                        1         11       25   0
## 1974                       0                        1         14       33   0
## 2104                       0                        1         19       40   0
## 2109                       0                        1         19       37   0
## 3108                       0                        1         12       25   0
## 4014                       0                        1         12       32   0
## 4016                       0                        1         14       30   0
## 
## [1] "set_36"
##  [1] "UBXN8"   "SLC12A4" "YIPF2"   "SLC41A2" "LINS"    "MFSD6"   "TMEM237"
##  [8] "ALG8"    "C2orf44" "GLB1"    "LGALS9C" "SLC38A9" "YIPF6"   "DBNDD2" 
## [15] "PIK3IP1" "SLC38A1" "UAP1"    "YPEL5"   "NBPF14"  "SLC38A2" "EPB41"  
## [22] "NBPF10"  "RGPD6"   "CD55"    "FAM169A" "NBPF16" 
## $reactome
##                                                                category
## 880                                slc mediated transmembrane transport
## 997 transport of inorganic cations anions and amino acids oligopeptides
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 880            1.686105e-05                0.9999997          4        9
## 997            7.902577e-05                0.9999993          3        5
##            FDR
## 880 0.01723200
## 997 0.04038217
## 
## $go_bp
##                category over_represented_pvalue under_represented_pvalue
## 864 glutamine transport              1.0726e-05                        1
##     numDEInCat numInCat        FDR
## 864          3        3 0.04383717
## 
## [1] "set_39"
##  [1] "NDUFAF7"  "ACADVL"   "NFE2L1"   "MTIF2"    "MRPS33"   "NDUFAF5" 
##  [7] "CDK5RAP1" "EARS2"    "MTFMT"    "THG1L"    "ATPAF1"   "C6orf203"
## [13] "MTO1"     "MRRF"     "CETN3"    "MALSU1"   "MRPL55"   "NDUFS4"  
## [19] "C7orf55"  "MRPL48"   "ZNF782"   "OXLD1"    "CCDC109B" "TSHZ2"   
## $reactome
##                      category over_represented_pvalue under_represented_pvalue
## 495 mitochondrial translation            5.271263e-09                1.0000000
## 986               translation            3.177704e-05                0.9999979
##     numDEInCat numInCat          FDR
## 495          6        9 5.387230e-06
## 986          7       46 1.623806e-02
## 
## $go_bp
##                                 category over_represented_pvalue
## 1301       mitochondrial gene expression            0.000000e+00
## 1311           mitochondrial translation            3.526225e-09
## 1307 mitochondrial rna metabolic process            9.799058e-06
##      under_represented_pvalue numDEInCat numInCat          FDR
## 1301                1.0000000          9       16 0.000000e+00
## 1311                1.0000000          7       13 7.205842e-06
## 1307                0.9999999          4        7 1.334958e-02
## 
## $immune
##                                                                        category
## 4823 gse9316 cd4 tcell balbc vs th17 enri cd4 tcell skg pma iono stim fr4neg up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4823            9.056117e-06                0.9999997          5       14
##             FDR
## 4823 0.04617714
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958692 478.5   17165476 916.8         NA 17165476 916.8
## Vcells 19171056 146.3   57891911 441.7      65536 78485130 598.8
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1